library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(patchwork)

Load the weather dataset

weather_df = 
  rnoaa::meteo_pull_monitors(
    c("USW00094728", "USC00519397", "USS0023B17S"),
    var = c("PRCP", "TMIN", "TMAX"), 
    date_min = "2017-01-01",
    date_max = "2017-12-31") %>%
  mutate(
    name = recode(
      id, 
      USW00094728 = "CentralPark_NY", 
      USC00519397 = "Waikiki_HA",
      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
## Registered S3 method overwritten by 'hoardr':
##   method           from
##   print.cache_info httr
## using cached file: /Users/yiming/Library/Caches/R/noaa_ghcnd/USW00094728.dly
## date created (size, mb): 2020-10-07 16:20:31 (7.525)
## file min/max dates: 1869-01-01 / 2020-10-31
## using cached file: /Users/yiming/Library/Caches/R/noaa_ghcnd/USC00519397.dly
## date created (size, mb): 2020-10-07 16:20:49 (1.699)
## file min/max dates: 1965-01-01 / 2020-03-31
## using cached file: /Users/yiming/Library/Caches/R/noaa_ghcnd/USS0023B17S.dly
## date created (size, mb): 2020-10-07 16:21:03 (0.88)
## file min/max dates: 1999-09-01 / 2020-10-31
weather_df
## # A tibble: 1,095 x 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # … with 1,085 more rows

Remeber this plot..?

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_point(alpha = .5)
## Warning: Removed 15 rows containing missing values (geom_point).

Labels

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_point(alpha = .5) +
  labs(
    tittle = "Tempperature plot",
    x = "Minimum daily temperature(C)",
    y = "Maximum daily temperature(C)",
    caption = "Data from rnoaa package; tempereature in 2017."
  )
## Warning: Removed 15 rows containing missing values (geom_point).

Scales

Start with the same plot; x and y scales

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_point(alpha = .5) +
  labs(
    tittle = "Tempperature plot",
    x = "Minimum daily temperature(C)",
    y = "Maximum daily temperature(C)",
    caption = "Data from rnoaa package; tempereature in 2017."
  ) +
  scale_x_continuous(
    breaks = c(-15, 0 ,15),
    labels = c("-15 C", "0", "15")
    )+ 
  scale_y_continuous(
    position = "right",
    trans = "log"
  )
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 90 rows containing missing values (geom_point).

Look at color scales

#通过使用scale_color_hue function 可以改变variable color范围,可以改变categorical name by using argument "name" in scale_color_hue function(name = "xxxx")
weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_point(alpha = .5) +
  labs(
    tittle = "Tempperature plot",
    x = "Minimum daily temperature(C)",
    y = "Maximum daily temperature(C)",
    caption = "Data from rnoaa package; tempereature in 2017."
  ) +
  scale_color_hue(
    name = "Location",
    h = c(100, 300))
## Warning: Removed 15 rows containing missing values (geom_point).

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_point(alpha = .5) +
  labs(
    tittle = "Tempperature plot",
    x = "Minimum daily temperature(C)",
    y = "Maximum daily temperature(C)",
    caption = "Data from rnoaa package; tempereature in 2017."
  ) +
  viridis::scale_color_viridis(
    name = "Location",
    discrete = TRUE)
## Warning: Removed 15 rows containing missing values (geom_point).

Themes

Shift the legend

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_point(alpha = .5) +
  labs(
    tittle = "Tempperature plot",
    x = "Minimum daily temperature(C)",
    y = "Maximum daily temperature(C)",
    caption = "Data from rnoaa package; tempereature in 2017."
  ) +
  viridis::scale_color_viridis(
    name = "Location",
    discrete = TRUE) +
  theme(
    legend.position = "bottom"
  )
## Warning: Removed 15 rows containing missing values (geom_point).

Change the overall theme.

#theme(legend.position)必须在theme_bw这类改plot背景的code之后
weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_point(alpha = .5) +
  labs(
    tittle = "Tempperature plot",
    x = "Minimum daily temperature(C)",
    y = "Maximum daily temperature(C)",
    caption = "Data from rnoaa package; tempereature in 2017."
  ) +
  viridis::scale_color_viridis(
    name = "Location",
    discrete = TRUE) +
  theme_bw()+
  theme(
    legend.position = "bottom"
  )
## Warning: Removed 15 rows containing missing values (geom_point).

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_point(alpha = .5) +
  labs(
    tittle = "Tempperature plot",
    x = "Minimum daily temperature(C)",
    y = "Maximum daily temperature(C)",
    caption = "Data from rnoaa package; tempereature in 2017."
  ) +
  viridis::scale_color_viridis(
    name = "Location",
    discrete = TRUE) +
  ggthemes::theme_excel()
## Warning: Removed 15 rows containing missing values (geom_point).

Setting options

放在每个Rmarkdown文件最开始的设定如下

library(tidyverse)

knitr::opts_chunk$set(
  fig.width = 6,
  fig.asp = .6,
  out.width = "90%"
)

theme_set(theme_minimal() + theme(legend.position = "bottom"))

options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "viridis"
)

scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d

Date args in “geom”

central_park = 
  weather_df %>% 
  filter(name == "CentralPark_NY")

waikiki =
  weather_df %>% 
  filter(name == "Waikiki_HA")

## 如何让同一个图里出现一个是scatteplot一个是现状图
ggplot(data = waikiki, aes(x = date, y =tmax, color = name)) +
  geom_point() +
  geom_line(data = central_park)
## Warning: Removed 3 rows containing missing values (geom_point).

patchwork

remeber facting?

weather_df %>% 
  ggplot(aes(x = tmin, fill = name)) +
  geom_density(alpha = .5) +
  facet_grid(. ~name)
## Warning: Removed 15 rows containing non-finite values (stat_density).

What happens when you want multipanel plots but can’t facet..?

##再不能用facet的情况下,混合multipanel到一个页面的方法
tmax_tmin_p = 
  weather_df %>% 
  ggplot(aes(x = tmin, y =tmax, color = name)) +
  geom_point(alpha = .5) +
  theme(legend.position = "none")

prcp_dens_p = 
  weather_df %>% 
  filter(prcp >0) %>% 
  ggplot(aes(x = prcp, fill = name)) +
  geom_density(alpha = .5)

tmax_date_p = 
  weather_df %>% 
  ggplot(aes(x = date, y = tmax, color = name)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  theme(legend.position = "none")

#三个图并列排
tmax_tmin_p + prcp_dens_p + tmax_date_p
## Warning: Removed 15 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

#第一个图在最上面,后两个在第二层并列
tmax_tmin_p /(prcp_dens_p + tmax_date_p)
## Warning: Removed 15 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

## Warning: Removed 3 rows containing missing values (geom_point).

Data manipulation

Control your factors.

#因为r的逻辑是把categorical varianles转换成factor123,然后在作图的时候又识别123分别是什么以categorical name显示
#所以想改变同一个图的panel顺序,比如把xxx放在第一个,那就是要进行data manipulation,而不是在ggplot进行操作
#下面这个步骤就是用fct_relevel把waikiki panel提到第一个
weather_df %>% 
  mutate(
    name = factor(name),
    name= forcats::fct_relevel(name, c("Waikiki_HA"))
  ) %>% 
  ggplot(aes(x = name,y = tmax, fill = name)) +
  geom_violin(alpha = .5)
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).

What is I wanted densities for tmin and tmax simultaneously?(就是tmin 和tmax density plot overlaid)

weather_df %>% 
  filter(name == "CentralPark_NY") %>% 
  pivot_longer(
    tmax:tmin,
    names_to = "observation",
    values_to = "temperature"
  ) %>% 
  ggplot(aes(x = temperature, fill = observation)) +
  geom_density(alpha = .5) 

weather_df %>% 
  pivot_longer(
    tmax:tmin,
    names_to = "observation",
    values_to = "temperature"
  ) %>% 
  ggplot(aes(x = temperature, fill = observation)) +
  geom_density(alpha = .5) + 
  facet_grid(. ~ name)
## Warning: Removed 18 rows containing non-finite values (stat_density).